Gene Ontology Analysis analysis of LCM RNA Data

Managing Packages Using Renv

To run this code in my project using the renv environment, run the following lines of code

install.packages("renv") #install the package on the new computer (may not be necessary if renv bootstraps itself as expected)
renv::restore() #reinstall all the package versions in the renv lockfile

Load packages

require("ViSEAGO")
## Loading required package: ViSEAGO
## 
## Warning: replacing previous import 'data.table::set' by 'dendextend::set' when
## loading 'ViSEAGO'
require("topGO")
## Loading required package: topGO
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: graph
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: GO.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: SparseM
## Warning: package 'SparseM' was built under R version 4.3.3
## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.
## 
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
## 
##     members
require("tidyverse")
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ ggplot2::annotate()   masks ViSEAGO::annotate()
## ✖ stringr::boundary()   masks graph::boundary()
## ✖ dplyr::collapse()     masks IRanges::collapse()
## ✖ dplyr::combine()      masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::desc()         masks IRanges::desc()
## ✖ tidyr::expand()       masks S4Vectors::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ dplyr::first()        masks S4Vectors::first()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ ggplot2::Position()   masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()       masks IRanges::reduce()
## ✖ dplyr::rename()       masks S4Vectors::rename()
## ✖ lubridate::second()   masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::select()       masks AnnotationDbi::select()
## ✖ dplyr::slice()        masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
sessionInfo() #provides list of loaded packages and version of R.
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices datasets  utils     methods  
## [8] base     
## 
## other attached packages:
##  [1] lubridate_1.9.3      forcats_1.0.0        stringr_1.5.1       
##  [4] dplyr_1.1.4          purrr_1.0.2          readr_2.1.5         
##  [7] tidyr_1.3.1          tibble_3.2.1         ggplot2_3.5.1       
## [10] tidyverse_2.0.0      topGO_2.54.0         SparseM_1.84-2      
## [13] GO.db_3.18.0         AnnotationDbi_1.64.1 IRanges_2.34.1      
## [16] S4Vectors_0.38.2     Biobase_2.60.0       graph_1.80.0        
## [19] BiocGenerics_0.46.0  ViSEAGO_1.16.0      
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3      rstudioapi_0.17.0       jsonlite_1.8.9         
##   [4] magrittr_2.0.3          rmarkdown_2.28          fs_1.6.4               
##   [7] zlibbioc_1.46.0         vctrs_0.6.5             memoise_2.0.1          
##  [10] RCurl_1.98-1.16         webshot_0.5.5           htmltools_0.5.8.1      
##  [13] progress_1.2.3          dynamicTreeCut_1.63-1   curl_5.2.3             
##  [16] sass_0.4.9              bslib_0.8.0             htmlwidgets_1.6.4      
##  [19] plyr_1.8.9              plotly_4.10.4           cachem_1.1.0           
##  [22] igraph_2.1.2            lifecycle_1.0.4         iterators_1.0.14       
##  [25] pkgconfig_2.0.3         Matrix_1.6-5            R6_2.5.1               
##  [28] fastmap_1.2.0           GenomeInfoDbData_1.2.10 digest_0.6.37          
##  [31] colorspace_2.1-1        RSQLite_2.3.9           seriation_1.5.7        
##  [34] filelock_1.0.3          timechange_0.3.0        fansi_1.0.6            
##  [37] httr_1.4.7              compiler_4.3.2          bit64_4.5.2            
##  [40] withr_3.0.1             BiocParallel_1.34.2     viridis_0.6.5          
##  [43] DBI_1.2.3               UpSetR_1.4.0            heatmaply_1.5.0        
##  [46] dendextend_1.19.0       R.utils_2.12.3          biomaRt_2.58.2         
##  [49] rappdirs_0.3.3          tools_4.3.2             R.oo_1.27.0            
##  [52] glue_1.8.0              DiagrammeR_1.0.11       GOSemSim_2.28.1        
##  [55] grid_4.3.2              fgsea_1.28.0            generics_0.1.3         
##  [58] gtable_0.3.5            tzdb_0.4.0              R.methodsS3_1.8.2      
##  [61] ca_0.71.1               data.table_1.16.2       hms_1.1.3              
##  [64] xml2_1.3.6              utf8_1.2.4              XVector_0.40.0         
##  [67] foreach_1.5.2           pillar_1.9.0            yulab.utils_0.1.9      
##  [70] BiocFileCache_2.10.2    lattice_0.22-6          renv_1.0.11            
##  [73] bit_4.5.0               tidyselect_1.2.1        registry_0.5-1         
##  [76] Biostrings_2.70.3       knitr_1.48              gridExtra_2.3          
##  [79] xfun_0.48               matrixStats_1.4.1       DT_0.33                
##  [82] visNetwork_2.1.2        stringi_1.8.4           lazyeval_0.2.2         
##  [85] yaml_2.3.10             evaluate_1.0.1          codetools_0.2-20       
##  [88] BiocManager_1.30.25     cli_3.6.3               munsell_0.5.1          
##  [91] jquerylib_0.1.4         Rcpp_1.0.13-1           GenomeInfoDb_1.36.4    
##  [94] dbplyr_2.5.0            png_0.1-8               XML_3.99-0.17          
##  [97] parallel_4.3.2          assertthat_0.2.1        blob_1.2.4             
## [100] prettyunits_1.2.0       AnnotationForge_1.44.0  bitops_1.0-9           
## [103] viridisLite_0.4.2       scales_1.3.0            crayon_1.5.3           
## [106] rlang_1.1.4             cowplot_1.1.3           fastmatch_1.1-6        
## [109] KEGGREST_1.40.1         TSP_1.2-4

Description of pipeline

I am going to perform functional enrichment of GO terms using ViSEAGO.

I am following this vignette: http://bioconductor.unipi.it/packages/devel/bioc/vignettes/ViSEAGO/inst/doc/ViSEAGO.html.

Load in reference files and differential expression data

In the next chunk I am loading in my DESeq data. These results are ordered by adjusted p-value. As a reminder, negative LFC = higher in Aboral tissue, and positive LFC = higher in Oral tissue.

#load in DESeq results
DESeq <- read.csv("../output_RNA/differential_expression/DESeq_results.csv", header = TRUE) %>% dplyr::rename("query" ="X")

#make dataframes of just differentially expressed genes for each LFC direction
DE_05_Aboral <- DESeq %>% filter(padj < 0.05 & log2FoldChange > 2)
DE_05_OralEpi <- DESeq %>% filter(padj < 0.05& log2FoldChange < -2)

#load in annotation data 
annot_tab <- read.delim("../references/annotation/protein-GO.tsv") %>% dplyr::rename(GOs = GeneOntologyIDs)

#filter annotation data for just expressed genes with GO annotations
annot_tab <- annot_tab %>% filter(query %in% DESeq$query) 

annot_tab$GOs <- gsub("; ", ";", annot_tab$GOs)
annot_tab$GOs[annot_tab$GOs==""] <- NA
annot_tab <- annot_tab %>% filter(!is.na(GOs))

nrow(annot_tab)
## [1] 10638
nrow(annot_tab)/nrow(DESeq)
## [1] 0.7354812

10638/14464 genes in our dataset have GO information in this file. That is 74%.

sum(annot_tab$query %in% DE_05_Aboral$query)
## [1] 278
sum(annot_tab$query %in% DE_05_Aboral$query)/nrow(DE_05_Aboral)
## [1] 0.6797066
sum(annot_tab$query %in% DE_05_OralEpi$query)
## [1] 546
sum(annot_tab$query %in% DE_05_OralEpi$query)/nrow(DE_05_OralEpi)
## [1] 0.6275862

545/804 genes that are significantly upregulated in the Aboral tissue have annotation information. That is 68% of the genes.

1979/2802 genes that are significantly upregulated in the Oral Epidermis tissue have annotation information. That is 71% of the genes.

Create custom GO annotation file for ViSEAGO

##Get a list of GO Terms for all genes
annots <- annot_tab %>% dplyr::select(query,GOs) %>% dplyr::rename("GO.terms" = GOs)

# format into the format required by ViSEAGO for custom mappings
Custom_GOs <- annots %>%
  # Separate GO terms into individual rows
  separate_rows(GO.terms, sep = ";") %>%
  # Add necessary columns
  mutate(
    taxid = "pacuta",
    gene_symbol = query,
    evidence = "SwissProt"
  ) %>%
  # Rename columns
  dplyr::rename(
    gene_id = query,
    GOID = GO.terms
  ) %>%
  dplyr::select(taxid, gene_id, gene_symbol, GOID, evidence)

Custom_GOs_valid <- Custom_GOs %>% filter(GOID %in% keys(GO.db))

write.table(Custom_GOs_valid, "../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt",row.names = FALSE, sep = "\t", quote = FALSE,col.names=TRUE)

length(unique(Custom_GOs$gene_id))
## [1] 10638
length(unique(Custom_GOs_valid$gene_id))
## [1] 10637

We seem to have lost one gene when filtering for valid GO terms, so I need to aMFount for that below.

load the file into ViSEAGO

Custom_Pacuta <- ViSEAGO::Custom2GO("../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt")
## 'select()' returned 1:1 mapping between keys and columns
myGENE2GO_Pacuta <- ViSEAGO::annotate(
    id="pacuta",
    Custom_Pacuta
)

Create gene lists for enrichment

selection <- DESeq %>% filter(query %in% Custom_GOs_valid$gene_id) %>% 
                       mutate(DE_05_Aboral = ifelse(query %in% DE_05_Aboral$query, 1,0)) %>%
                       mutate(DE_05_Oral = ifelse(query %in% DE_05_OralEpi$query, 1,0)) %>%
                       mutate(expressed = 1)

selection_Aboral <- selection %>% pull(DE_05_Aboral) %>% as.factor()
names(selection_Aboral) <- selection %>% pull(query)

selection_Oral <- selection %>% pull(DE_05_Oral) %>% as.factor()
names(selection_Oral) <- selection %>% pull(query)

expressed <- selection %>% pull(expressed) %>% as.factor()
names(expressed) <- selection %>% pull(query)

Oral Epidermis:

create topGO objects and perform enrichment using topGO wrapped by ViSEAGO

# create viseago object
selection <- names(selection_Oral)[selection_Oral==1]
background <- names(expressed)

BP_Oral <- ViSEAGO::create_topGOdata(
    geneSel=selection,
    allGenes=background,
    gene2GO=myGENE2GO_Pacuta, 
    ont="BP",
    nodeSize=5
)
## 
## Building most specific GOs .....
##  ( 8920 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 12838 GO terms and 28523 relations. )
## 
## Annotating nodes ...............
##  ( 9534 genes annotated to the GO terms. )
# perform TopGO test using classic algorithm
classic_Oral <- topGO::runTest(
    BP_Oral,
    algorithm ="classic",
    statistic = "fisher"
)
## 
##           -- Classic Algorithm -- 
## 
##       the algorithm is scoring 3938 nontrivial nodes
##       parameters: 
##           test statistic: fisher
BP_Results <- ViSEAGO::merge_enrich_terms(
    Input = list(Oral = c("BP_Oral", "classic_Oral"))
)
## 'select()' returned 1:1 mapping between keys and columns
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
##  Oral
##       BP_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 546
##         feasible_genes: 9534
##         feasible_genes_significant: 472
##         genes_nodeSize: 5
##         nodes_number: 6852
##         edges_number: 14732
##       classic_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher
##         algorithm_name: classic
##         GO_scored: 6852
##         GO_significant: 126
##         feasible_genes: 9534
##         feasible_genes_significant: 472
##         genes_nodeSize: 5
##         Nontrivial_nodes: 3938 
##  - enrichment pvalue cutoff:
##         Oral : 0.01
## - enrich GOs (in at least one list): 126 GO terms of 1 conditions.
##         Oral : 126 terms

Visualize and save initial results

# display the merged table
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
    BP_Results,
    "../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral_Fisher.csv"
)

Semantic similarity

# initialize 
myGOs<-ViSEAGO::build_GO_SS(
    gene2GO=myGENE2GO_Pacuta,
    enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
# compute all available Semantic Similarity (SS) measures
myGOs <- ViSEAGO::compute_SS_distances(
    myGOs,
    distance="Wang"
)

myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Oral
##       BP_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 546
##         feasible_genes: 9534
##         feasible_genes_significant: 472
##         genes_nodeSize: 5
##         nodes_number: 6852
##         edges_number: 14732
##       classic_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher
##         algorithm_name: classic
##         GO_scored: 6852
##         GO_significant: 126
##         feasible_genes: 9534
##         feasible_genes_significant: 472
##         genes_nodeSize: 5
##         Nontrivial_nodes: 3938 
##  - enrichment pvalue cutoff:
##         Oral : 0.01
## - enrich GOs (in at least one list): 126 GO terms of 1 conditions.
##         Oral : 126 terms
## - terms distances:  Wang

Visualization

Multi Dimensional Scaling

# display MDSplot
ViSEAGO::MDSplot(myGOs,
                 "GOterms")

Heatmap

# GOterms heatmap with the default parameters
Wang_clusters_wardD2<-ViSEAGO::GOterms_heatmap(
    myGOs,
    showIC=TRUE,
    showGOlabels=TRUE,
    GO.tree=list(
        tree=list(
            distance="Wang",
            aggreg.method="ward.D2"
        ),
        cut=list(
            dynamic=list(
                pamStage=TRUE,
                pamRespectsDendro=TRUE,
                deepSplit=2,
                minClusterSize =2
            )
        )
    ),
    samples.tree=NULL
)
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2,
    "GOterms"
)
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2,
    "GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2)
# Print the clusters-heatmap table
ViSEAGO::show_table(
    Wang_clusters_wardD2,
    "../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral_cluster_heatmap_Wang_wardD2.csv"
)

MDS

# display colored MDSplot
ViSEAGO::MDSplot(
    Wang_clusters_wardD2,
    "GOterms")

Visualization and interpretation of GO clusters

# calculate semantic similarites between clusters of GO terms
Wang_clusters_wardD2<-ViSEAGO::compute_SS_distances(
    Wang_clusters_wardD2,
    distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned many:1 mapping between keys and columns
# build and highlight in an interactive MDSplot grouped clusters for one distance object
ViSEAGO::MDSplot(
    Wang_clusters_wardD2,
    "GOclusters")
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
# GOclusters heatmap
Wang_clusters_wardD2<-ViSEAGO::GOclusters_heatmap(
    Wang_clusters_wardD2,
    tree=list(
        distance="BMA",
        aggreg.method="ward.D2"
    )
)
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2,
    "GOclusters")

Aboral Tissue:

create topGO objects and perform enrichment using topGO wrapped by ViSEAGO

# create viseago object
selection <- names(selection_Aboral)[selection_Aboral==1]
background <- names(expressed)

BP_Aboral <- ViSEAGO::create_topGOdata(
    geneSel=selection,
    allGenes=background,
    gene2GO=myGENE2GO_Pacuta, 
    ont="BP",
    nodeSize=5
)
## 
## Building most specific GOs .....
##  ( 8920 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 12838 GO terms and 28523 relations. )
## 
## Annotating nodes ...............
##  ( 9534 genes annotated to the GO terms. )
# perform TopGO test using classic algorithm
classic_Aboral <- topGO::runTest(
    BP_Aboral,
    algorithm ="classic",
    statistic = "fisher"
)
## 
##           -- Classic Algorithm -- 
## 
##       the algorithm is scoring 2859 nontrivial nodes
##       parameters: 
##           test statistic: fisher
BP_Results <- ViSEAGO::merge_enrich_terms(
    Input = list(Aboral = c("BP_Aboral", "classic_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
##  Aboral
##       BP_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 278
##         feasible_genes: 9534
##         feasible_genes_significant: 243
##         genes_nodeSize: 5
##         nodes_number: 6852
##         edges_number: 14732
##       classic_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher
##         algorithm_name: classic
##         GO_scored: 6852
##         GO_significant: 336
##         feasible_genes: 9534
##         feasible_genes_significant: 243
##         genes_nodeSize: 5
##         Nontrivial_nodes: 2859 
##  - enrichment pvalue cutoff:
##         Aboral : 0.01
## - enrich GOs (in at least one list): 336 GO terms of 1 conditions.
##         Aboral : 336 terms

Visualize and save initial results

# display the merged table
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
    BP_Results,
    "../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral_Fisher.csv"
)

Semantic similarity

# initialize 
myGOs<-ViSEAGO::build_GO_SS(
    gene2GO=myGENE2GO_Pacuta,
    enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
# compute all available Semantic Similarity (SS) measures
myGOs <- ViSEAGO::compute_SS_distances(
    myGOs,
    distance="Wang"
)

myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Aboral
##       BP_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 278
##         feasible_genes: 9534
##         feasible_genes_significant: 243
##         genes_nodeSize: 5
##         nodes_number: 6852
##         edges_number: 14732
##       classic_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher
##         algorithm_name: classic
##         GO_scored: 6852
##         GO_significant: 336
##         feasible_genes: 9534
##         feasible_genes_significant: 243
##         genes_nodeSize: 5
##         Nontrivial_nodes: 2859 
##  - enrichment pvalue cutoff:
##         Aboral : 0.01
## - enrich GOs (in at least one list): 336 GO terms of 1 conditions.
##         Aboral : 336 terms
## - terms distances:  Wang

Visualization

Multi Dimensional Scaling

# display MDSplot
ViSEAGO::MDSplot(myGOs,"GOterms")

Heatmap

# GOterms heatmap with the default parameters
Wang_clusters_wardD2<-ViSEAGO::GOterms_heatmap(
    myGOs,
    showIC=TRUE,
    showGOlabels=TRUE,
    GO.tree=list(
        tree=list(
            distance="Wang",
            aggreg.method="ward.D2"
        ),
        cut=list(
            dynamic=list(
                pamStage=TRUE,
                pamRespectsDendro=TRUE,
                deepSplit=2,
                minClusterSize =2
            )
        )
    ),
    samples.tree=NULL
)
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2,
    "GOterms"
)
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2,
    "GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2)
# Print the clusters-heatmap table
ViSEAGO::show_table(
    Wang_clusters_wardD2,
    "../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral_cluster_heatmap_Wang_wardD2.csv"
)

MDS

# display colored MDSplot
ViSEAGO::MDSplot(
    Wang_clusters_wardD2,
    "GOterms")

Visualization and interpretation of GO clusters

# calculate semantic similarites between clusters of GO terms
Wang_clusters_wardD2<-ViSEAGO::compute_SS_distances(
    Wang_clusters_wardD2,
    distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned many:1 mapping between keys and columns
# build and highlight in an interactive MDSplot grouped clusters for one distance object
ViSEAGO::MDSplot(
    Wang_clusters_wardD2,
    "GOclusters")
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
# GOclusters heatmap
Wang_clusters_wardD2<-ViSEAGO::GOclusters_heatmap(
    Wang_clusters_wardD2,
    tree=list(
        distance="BMA",
        aggreg.method="ward.D2"
    )
)
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2,
    "GOclusters")

clusterprofiler

library(clusterProfiler)
## Warning: package 'clusterProfiler' was built under R version 4.3.3
## clusterProfiler v4.10.1  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
## 
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:purrr':
## 
##     simplify
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:IRanges':
## 
##     slice
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## The following object is masked from 'package:stats':
## 
##     filter
library(enrichplot)
library(GO.db)

DE_05_Aboral <- DESeq %>% filter(padj < 0.05 & log2FoldChange > 1)

# Create TERM2GENE data.frame
TERM2GENE <- Custom_GOs_valid %>%
  dplyr::select(GOID, gene_id) %>%
  dplyr::distinct()  # remove potential duplicates

gene_list_aboral <- DE_05_Aboral$query

ego_aboral <- enricher(
  gene = gene_list_aboral,
  TERM2GENE = TERM2GENE,
  pvalueCutoff = 0.05,
  qvalueCutoff = 0.05
)

ego_aboral@result$Description <- Term(ego_aboral@result$ID)

dotplot(ego_aboral, showCategory = 20)

barplot(ego_aboral, showCategory = 20, title = "GO Barplot (Aboral)")

# Clean ego_aboral@result$geneID
ego_aboral@result$geneID <- gsub("Pocillopora_acuta_HIv2___", "", ego_aboral@result$geneID)
ego_aboral@result$geneID <- gsub("\\.t1[a-z]*", "", ego_aboral@result$geneID)

treeplot(pairwise_termsim(ego_aboral))
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.

########

DE_05_OralEpi <- DESeq %>% filter(padj < 0.05& log2FoldChange < 0)
gene_list_oral <- DE_05_OralEpi$query

ego_oral <- enricher(
  gene = gene_list_oral,
  pAdjustMethod="none",
  TERM2GENE = TERM2GENE,
  pvalueCutoff = 0.05,
  qvalueCutoff = 1
)
ego_oral
## #
## # over-representation test
## #
## #...@organism     UNKNOWN 
## #...@ontology     UNKNOWN 
## #...@gene     chr [1:2802] "Pocillopora_acuta_HIv2___RNAseq.g6351.t1" ...
## #...pvalues adjusted by 'none' with cutoff <0.05 
## #...108 enriched terms found
## 'data.frame':    108 obs. of  9 variables:
##  $ ID         : chr  "GO:1902495" "GO:0045211" "GO:0008593" "GO:1902476" ...
##  $ Description: chr  "GO:1902495" "GO:0045211" "GO:0008593" "GO:1902476" ...
##  $ GeneRatio  : chr  "11/1978" "44/1978" "11/1978" "15/1978" ...
##  $ BgRatio    : chr  "14/10637" "123/10637" "17/10637" "32/10637" ...
##  $ pvalue     : num  1.87e-06 4.57e-06 3.66e-05 2.40e-04 2.63e-04 ...
##  $ p.adjust   : num  1.87e-06 4.57e-06 3.66e-05 2.40e-04 2.63e-04 ...
##  $ qvalue     : num  0.00423 0.00515 0.02753 0.10955 0.10955 ...
##  $ geneID     : chr  "Pocillopora_acuta_HIv2___RNAseq.g28741.t1/Pocillopora_acuta_HIv2___RNAseq.g21572.t1a/Pocillopora_acuta_HIv2___R"| __truncated__ "Pocillopora_acuta_HIv2___RNAseq.g5252.t1/Pocillopora_acuta_HIv2___RNAseq.g28202.t1/Pocillopora_acuta_HIv2___RNA"| __truncated__ "Pocillopora_acuta_HIv2___RNAseq.g7241.t1/Pocillopora_acuta_HIv2___RNAseq.g7243.t1/Pocillopora_acuta_HIv2___RNAs"| __truncated__ "Pocillopora_acuta_HIv2___RNAseq.g19042.t1/Pocillopora_acuta_HIv2___RNAseq.g14871.t1/Pocillopora_acuta_HIv2___RN"| __truncated__ ...
##  $ Count      : int  11 44 11 15 14 19 32 17 8 8 ...
## #...Citation
##  T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu.
##  clusterProfiler 4.0: A universal enrichment tool for interpreting omics data.
##  The Innovation. 2021, 2(3):100141
ego_oral@result$Description <- Term(ego_oral@result$ID)

upsetplot(ego_oral)

dotplot(ego_oral, showCategory = 20)

barplot(ego_oral, showCategory = 20, title = "GO Barplot (oral)")

# Clean ego_oral@result$geneID
ego_oral@result$geneID <- gsub("Pocillopora_acuta_HIv2___", "", ego_oral@result$geneID)
ego_oral@result$geneID <- gsub("\\.t1[a-z]*", "", ego_oral@result$geneID)

treeplot(pairwise_termsim(ego_oral), hclust_method = "ward.D2",nwords=2)
## Warning in treeplot.enrichResult(x, ...): Use 'cluster.params = list(method = your_value)' instead of 'hclust_method'.
##  The hclust_method parameter will be removed in the next version.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.